home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Shareware Grab Bag
/
Shareware Grab Bag.iso
/
002
/
bigsort.arc
/
BIGSORT.PAS
< prev
Wrap
Pascal/Delphi Source File
|
1985-09-22
|
15KB
|
461 lines
{$C-}
{$G512}
{$P512}
PROGRAM bigsort(Input, Output);
{*************************************************************************}
{* Copyright (c) Kim Kokkonen, TurboPower Software, 1985 *}
{* Released to the public domain for personal, non-commercial use only *}
{*************************************************************************}
{ sort as large a text file as fits in memory }
{ designed as a filter, requires Turbo Pascal 3.0 to compile }
{ written 7/85, phone 408-378-3672 }
{ see options in WRITEHELP, call BIGSORT with no arguments to list options}
{ sorts more than 3x faster than MSDOS SORT for large files }
{ includes a RANDOMIZE feature that aids in sorting presorted files }
{ compile with maximum heap size A000 }
CONST
maxfile = 15000; {max number of lines in input file.
limited by 4*maxfile<65000}
stackparas = 512; {minimum paragraphs to reserve for stack during read-in}
ss = 9; {sort switchover from quick to bubble}
toklen = 64; {max length of a command line token}
maxtok = 4; {max number of tokens on command line}
optiondelim = '-'; {char used to introduce command line options}
TYPE
linebuf = STRING[255];
lineptr = ^byte;
linearray = ARRAY[1..maxfile] OF lineptr;
VAR
lines : linearray; {pointers to each text line stored here}
nlines : Integer; {number of lines}
showstats, status, partial, upper, reverse : Boolean; {option flags}
numtocopy, begincol, endcol : Integer; {option values}
reg : RECORD {register variable}
ax, bx, cx, dx, bp, si, di, ds, es, flags : Integer;
END;
PROCEDURE breakhalt;
{-executed when break is detected}
{exit with return code 1}
BEGIN
INLINE(
$B8/$01/$4C/ {mov ax,4c01}
$CD/$21 {int 21}
);
END; {breakhalt}
PROCEDURE setbreak;
{-set the ctrl-break address to a process exit handler}
BEGIN
reg.ax := $2523;
reg.ds := CSeg;
reg.dx := Ofs(breakhalt);
MsDos(reg);
END; {setbreak}
PROCEDURE checkkeys;
{-capture ^C, ^S, ^Q}
{note that just calling keypressed should trigger int 23 on control-break}
VAR
c : Char;
BEGIN
WHILE KeyPressed DO BEGIN
Read(Kbd, c);
IF c = ^S THEN
REPEAT
Read(Kbd, c);
IF c = ^C THEN breakhalt;
UNTIL c = ^Q
ELSE IF c = ^C THEN
breakhalt;
END;
END; {checkkeys}
FUNCTION iostat(bit : Integer) : Boolean;
{-check status of the standard I/O}
{bit=0 for input, 1 for output}
{returns true if I or O is through console}
BEGIN
reg.ax := $4400;
reg.bx := bit; {standard input or output}
MsDos(reg);
iostat := ((reg.dx AND 1) <> 0);
END; {iostat}
PROCEDURE putline(VAR l : linebuf; VAR lptr : lineptr);
{-store a string in a contiguous array in the heap}
{increment the position cursor nextpos}
{halt if the string won't fit}
VAR
len : Byte ABSOLUTE l;
tlen : Byte;
space : Integer;
BEGIN
tlen := len+1; {length of string including length byte}
space := MaxAvail;
IF (space < 0) OR ((space-stackparas) > (1+(tlen SHR 4))) THEN BEGIN
{enough space left to add string}
GetMem(lptr, tlen);
Move(l, lptr^, tlen);
END ELSE BEGIN
WriteLn(Con);
WriteLn(Con, 'not enough memory left to store text....');
Halt;
END;
END; {putline}
FUNCTION getline(lptr : lineptr) : linebuf;
{-get a string back from the contiguous heap array}
VAR
bytestomove : Byte;
l : linebuf;
BEGIN
bytestomove := lptr^+1;
Move(lptr^, l, bytestomove);
getline := l;
END; {getline}
PROCEDURE readinfile(VAR nlines : Integer);
{-read lines from standard input and put the text on the heap}
VAR
l : linebuf;
BEGIN
nlines := 0;
IF status THEN BEGIN
Write(Con, ^M); ClrEol;
Write(Con, 'READING ');
END;
WHILE NOT EoF DO BEGIN
{read line}
ReadLn(l);
IF nlines < maxfile THEN BEGIN
nlines := nlines+1;
IF status AND (nlines AND 31 = 0) THEN
Write(Con, ^H^H^H^H^H, nlines:5);
checkkeys;
{store pointer into text heap}
{store line on text heap}
putline(l, lines[nlines]);
END ELSE BEGIN
WriteLn(Con);
WriteLn(Con, 'Exceeded maximum number of lines....');
Halt;
END;
END;
END; {readinfile}
PROCEDURE writeoutfile(nlines : Integer);
{-write the sorted information out}
VAR
i : Integer;
l : linebuf;
BEGIN
IF showstats THEN Write(Con, 'WRITING ');
FOR i := 1 TO nlines DO BEGIN
{for unknown reason, cannot put getline inside of writeln for DOS 2.1}
l := getline(lines[i]);
WriteLn(l);
IF showstats AND (i MOD 32 = 0) THEN
Write(Con, ^H^H^H^H^H, i:5);
checkkeys;
END;
IF showstats THEN BEGIN
Write(Con, ^M); ClrEol;
END;
END; {writeoutfile}
PROCEDURE Swap(VAR x, y : lineptr);
{-swap two array pointers}
VAR
temp : lineptr;
BEGIN
temp := x;
x := y;
y := temp;
END; {swap}
PROCEDURE mixlines(nlines : Integer);
{-randomize record order to aid quicksort with semi-presorted lists}
VAR
i : Integer;
BEGIN
FOR i := 1 TO nlines DO
Swap(lines[i], lines[1+Random(nlines)]);
END; {mixlines}
PROCEDURE stupcase(VAR l : linebuf);
{-return uppercase of a string}
VAR
i : Byte;
BEGIN
FOR i := 1 TO Length(l) DO l[i] := UpCase(l[i]);
END; {stupcase}
FUNCTION lessthan(l1, l2 : linebuf) : Boolean;
{-return true if l1<l2 under the option assumptions}
BEGIN
IF upper THEN BEGIN
stupcase(l1);
stupcase(l2);
END;
IF partial THEN BEGIN
l1 := Copy(l1, begincol, numtocopy);
l2 := Copy(l2, begincol, numtocopy);
END;
IF reverse THEN
lessthan := (l1 > l2)
ELSE
lessthan := (l1 < l2)
END; {lessthan}
FUNCTION equal(l1, l2 : linebuf) : Boolean;
{-return true if l1=l2 under the option assumptions}
BEGIN
IF upper THEN BEGIN
stupcase(l1);
stupcase(l2);
END;
IF partial THEN BEGIN
l1 := Copy(l1, begincol, numtocopy);
l2 := Copy(l2, begincol, numtocopy);
END;
equal := (l1 = l2);
END; {equal}
PROCEDURE bubblesort(k, l : Integer);
{-simple n**2 sort good for short lists}
VAR
i, j : Integer;
BEGIN
FOR i := k TO (l-1) DO
FOR j := l DOWNTO (i+1) DO
IF lessthan(getline(lines[j]), getline(lines[j-1])) THEN
Swap(lines[j], lines[j-1]);
END; {bubblesort}
PROCEDURE quicksort(i, j : Integer);
{-fast sorting algorithm modified to be hybrid with bubblesort}
VAR
pivot : linebuf;
k, pivotindex, ramleft : Integer;
enoughram : Boolean;
PROCEDURE writestatus(i, j : Integer);
{-provide some reassurance that sort is proceeding}
BEGIN
Write(Con, ^H^H^H^H^H); ClrEol;
{prints size of current partition being sorted}
Write(Con, (j-i):5);
END; {writestatus}
FUNCTION findpivot(i, j : Integer) : Integer;
{-part of quicksort}
VAR
firstkey, l : linebuf;
k : Integer;
BEGIN
firstkey := getline(lines[i]);
FOR k := (i+1) TO j DO BEGIN
l := getline(lines[k]);
IF lessthan(l, firstkey) THEN BEGIN
findpivot := i;
Exit;
END ELSE IF NOT(equal(l, firstkey)) THEN BEGIN
findpivot := k;
Exit;
END;
END;
findpivot := 0;
END; {findpivot}
FUNCTION partition(i, j : Integer; VAR pivot : linebuf) : Integer;
{-part of quicksort}
VAR
l, r : Integer;
BEGIN
l := i;
r := j;
REPEAT
Swap(lines[l], lines[r]);
WHILE lessthan(getline(lines[l]), pivot) DO l := l+1;
WHILE NOT(lessthan(getline(lines[r]), pivot)) DO r := r-1;
UNTIL l > r;
partition := l;
END; {partition}
BEGIN {quicksort}
checkkeys; {check for a break}
IF status THEN writestatus(i, j);
pivotindex := findpivot(i, j);
IF pivotindex <> 0 THEN BEGIN
pivot := getline(lines[pivotindex]);
k := partition(i, j, pivot);
ramleft := MemAvail;
enoughram := (ramleft < 0) OR (ramleft > 32);
IF ((k-1-i) > ss) AND enoughram THEN
quicksort(i, k-1)
ELSE
bubblesort(i, k-1);
IF ((j-k) > ss) AND enoughram THEN
quicksort(k, j)
ELSE
bubblesort(k, j);
END;
END; {quicksort}
PROCEDURE writehelp;
{-display a help screen}
BEGIN
WriteLn(Con);
WriteLn(Con, 'Usage: BIGSORT [Options] <InputPathname [>OutputPathName]');
LowVideo;
WriteLn(Con);
WriteLn(Con, 'Sort limited in size only by available RAM.');
WriteLn(Con, '384K free RAM will sort a 256K file of 7000 lines.');
WriteLn(Con, 'Each text line limited to 255 characters and must be terminated by a <CR><LF>.');
WriteLn(Con, 'Maximum of 15000 text lines.');
WriteLn(Con, 'Input text is automatically randomized to avoid presorting problems.');
WriteLn(Con);
NormVideo;
WriteLn(Con, 'Options:');
LowVideo;
WriteLn(Con, ' -I Ignore case while sorting');
WriteLn(Con, ' -R sort in Reverse order');
WriteLn(Con, ' -Bn Begin sort key with column n of each line (default 1)');
WriteLn(Con, ' -En End sort key with column n of each line (default end of line)');
WriteLn(Con, ' -Q Quiet mode. No status during sort');
NormVideo;
END; {writehelp}
PROCEDURE setoptions;
{-analyze input line}
VAR
i, code : Integer;
num : STRING[6];
arg : STRING[64];
BEGIN
{set defaults}
upper := False; reverse := False; status := True;
begincol := 1; endcol := 255; partial := False;
WriteLn(Con);
{scan through argument list}
i := 1;
WHILE i <= ParamCount DO BEGIN
arg := ParamStr(i);
IF (arg[1] = optiondelim) AND (Length(arg) > 1) THEN BEGIN
CASE UpCase(arg[2]) OF
'I' : upper := True;
'R' : reverse := True;
'Q' : status := False;
'B' : BEGIN
num := Copy(arg, 3, 6);
Val(num, begincol, code);
IF code <> 0 THEN BEGIN
WriteLn(Con, 'Illegal Begin column specified: ', arg);
writehelp;
Halt;
END;
IF (begincol <= 0) OR (begincol > 255) THEN BEGIN
WriteLn(Con, 'Illegal Begin column specified: ', arg);
WriteLn(Con, ' column must be in the range of 1..255');
writehelp;
Halt;
END;
IF begincol > 1 THEN partial := True;
END;
'E' : BEGIN
num := Copy(arg, 3, 6);
Val(num, endcol, code);
IF code <> 0 THEN BEGIN
WriteLn(Con, 'Illegal End column specified: ', arg);
writehelp;
Halt;
END;
IF (endcol <= 0) OR (endcol > 255) THEN BEGIN
WriteLn(Con, 'Illegal End column specified: ', arg);
WriteLn(Con, ' column must be in the range of 1..255');
writehelp;
Halt;
END;
IF endcol < 255 THEN partial := True;
END;
ELSE
WriteLn(Con, 'Unrecognized command line option: ', arg);
writehelp;
Halt;
END;
END ELSE BEGIN
WriteLn(Con, 'Unrecognized command line option: ', arg);
writehelp;
Halt;
END;
i := i+1;
END;
numtocopy := endcol-begincol+1;
showstats := status AND NOT(iostat(1));
END; {setoptions}
FUNCTION ramavail : Real;
{-return the number of bytes of RAM left for heap and stack}
VAR
t : Real;
BEGIN
t := MaxAvail;
IF t < 0 THEN t := 65536.0+t;
ramavail := 16.0*t;
END; {ramavail}
BEGIN {main}
IF iostat(0) THEN BEGIN
WriteLn(Con);
WriteLn(Con, 'input must be redirected from a file....');
writehelp;
Halt;
END;
{analyze command line options}
setoptions;
{make sure we can break out}
setbreak;
IF status THEN
WriteLn(Con, 'Total RAM for heap and stack: ', ramavail:6:0);
{read in the input file}
readinfile(nlines);
IF status THEN BEGIN
Write(Con, ^M); ClrEol;
WriteLn(Con, 'RAM left for recursion stack: ', ramavail:6:0);
WriteLn(Con, 'Total lines: ', nlines);
END;
{randomize records}
IF status THEN Write(Con, 'RANDOMIZING');
mixlines(nlines);
{sort}
IF status THEN BEGIN
Write(Con, ^M); ClrEol;
Write(Con, 'SORTING ');
END;
IF nlines > ss THEN quicksort(1, nlines) ELSE bubblesort(1, nlines);
IF status THEN BEGIN
Write(Con, ^M); ClrEol;
END;
{write out the results}
writeoutfile(nlines);
END.